################################################################################ 
#
# Replication code for:
# Empathy-based counterspeech can reduce racist hate speech
# in a social media field experiment
#
# PNAS 2021
# 
# 
# Additional Unpublished Analysis
# --------------------------------
# 
# We estimate the effect of the treatment assignment on the probability that 
# the user gets suspended or the account is set to private or deleted
# We show that attrition is balanced across groups and is not driven by the treatment
# 
################################################################################ 


setsave = F
rm(list = ls())

################################################################################ 
#  LIBRARIES
################################################################################ 

library(readxl)
library(xtable)
library(estimatr)
library(ggplot2)

################################################################################ 
#   DATA AND FOLDER
################################################################################ 

wd = dirname(dirname(dirname(rstudioapi::getActiveDocumentContext()$path)))
wd_res = paste0(wd, '/results')
wd_data =  paste0(wd, '/data')
setwd(wd_data)


# Load attrition dataset: only includes tweets that have been deleted and why
att = read.csv("attrition.csv")
att$suspended[att$banned==1] = 1 # synonyms
att[c('NOTES', 'acc_status', 'treatment_main', 'banned')] = NULL
att[is.na(att)] = 0

# Load main dataset
data = read.csv('main_dataset.csv')
data = data[c('exp_id', 'deleted_suspended', 'ITT', 'treatment_main', "treatment_dummy")]

# Merge and set attrition types to 0 for non attrited tweets
data = merge(data, att, on='exp_id', all.x = T)
outvars = c('deleted_suspended', 'privated', 'deleted', 'suspended')
data[outvars][is.na(data[outvars])] = 0


################################################################################
#   Separately for each outcome
################################################################################

data$self = ifelse((data$privated==1 | data$deleted==1), 1, 0)

outvars = c('self', 'suspended')
outvars_name = rep(c('Set to private\nor Deleted by User', 'Suspended'), each= 3)
Treatment = rep(c('Humor', 'Warning of\nConsequences', 'Empathy'), times = length(outvars))


outcomes = data[outvars]

models <- lapply(1:length(outvars), function(x) lm_robust(scale(outcomes[,x]) ~
                                                    factor(data$treatment_main)))
summaries <- lapply(models, summary)
coeffs = lapply(summaries, function(x) x$coefficients[2:4, c(1,2,4)])

container = data.frame()
for (n in 1:length(outcomes)){
  c = coeffs[[n]]
  container = rbind.data.frame(container, c)
}

colnames(container) = c('coeff', 'se', 'pval')
container = cbind.data.frame(container, outvars=rep(outvars, each=3), Treatment, outvars_name)
container$outvars.f = factor(container$outvars_name, levels=unique(container$outvars_name))


ggplot(container, aes(x=coeff, y=outvars.f, colour = Treatment)) +
   ggtitle('Effect of Different Treatments on Multiple Outcomes (std)') +
   geom_line() +
   geom_point(position = position_dodge(width = 1/2))+
   geom_errorbar(aes(xmin=coeff-se*1.96, xmax=coeff+se*1.96), width=0,
                 position = position_dodge(width = 1/2), size=.5) +
   geom_errorbar(aes(xmin=coeff-se*1.645, xmax=coeff+se*1.645), width=0,
                 position = position_dodge(width = 1/2), size=1) +
   geom_vline(xintercept=0, color = "red", linetype="dashed") +
   xlab("") + ylab("") +
   theme_light() +
   theme(text = element_text(size=23), strip.text.y = element_text(angle = 90),
         legend.position = 'bottom', legend.box = "horizontal", legend.title=element_blank())


#if (setsave==T){ggsave(paste0(wd_res, '/attrition_by_treatment.png'), width=10, height = 10)}
